import numpy as np
import pandas as pd
import plotly.graph_objects as go
import ipywidgets as widgets
import plotly.express as px
import matplotlib.pyplot as pp
import seaborn
import matplotlib
import plotly
import plotly.offline as py
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import random
import math
import time
import datetime
import operator
import warnings
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
from plotly.graph_objs import Scatter, Layout, Figure, Data, Stream, YAxis, Marker, Bar
from sklearn.linear_model import LinearRegression, BayesianRidge
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error
confirm=pd.read_csv("CovidDataConfirmGlobal.csv")
confirm=confirm.rename(columns={"Country/Region":"country","Province/State":"province"})
data_confirm_global_use1 = pd.read_csv("CovidDataConfirmGlobal.csv", usecols = [0,1,2,3,323])
data_confirm_global_use2= pd.read_csv("CovidDataConfirmGlobal.csv", usecols = [0,1,2,3,322,323])
data_confirm_global_use3 = pd.read_csv('CovidDataConfirmGlobal.csv',usecols = [1,322,323])
df_confirm = pd.read_csv('CovidDataConfirmGlobal.csv')
df_death = pd.read_csv('CovidDataDeathGlobal.csv')
df_confirm = pd.read_csv('CovidDataConfirmGlobal.csv')
df_infection = pd.read_csv('time_series_covid_19_deaths_US.csv')
df_confirm = pd.read_csv('CovidDataConfirmGlobal.csv')
df_recover = pd.read_csv('CovidDataRecoveredGlobal.csv')
confirmed_df = pd.read_csv('time_series_covid_19_confirmed.csv')
deaths_df = pd.read_csv('time_series_covid_19_deaths.csv')
recoveries_df = pd.read_csv('time_series_covid_19_recovered.csv')
cols = [i for i in confirm.columns if i not in ['province', 'country','Lat','Long']]
confirm_date = confirm[cols]
confirm_sum=confirm_date.apply(lambda x:sum(x))
x_data=np.array(list(confirm_date.columns))
y_data=confirm_sum
fig=go.Figure()
fig.add_trace(go.Scatter(x=x_data,y=y_data, mode='lines+markers',
name="Confirm",
line= dict(color="blue", width=2),
text = "Total confirm:"+ str(y_data[-1])
))
fig.show()
largest_ten_confirm=confirm.nlargest(10, ['12/6/20'])
fig=px.scatter(largest_ten_confirm,x="country",y="12/6/20",size="12/6/20",color="country",hover_name="country",size_max=100)
fig.show()
def plot_for_country(country):
labels=["Confirm"]
colors=["red"]
mode_size=[6,8]
line_size=[4,5]
df_list=[confirm]
fig=go.Figure()
for i, df in enumerate(df_list):
x_data=np.array(list(df.iloc[:,4:].columns))
y_data=np.sum(np.asarray(df[df['country']==country].iloc[:,4:]),axis=0)
fig.add_trace(go.Scatter(x=x_data,y=y_data, mode='lines+markers',
name=labels[i],
line= dict(color=colors[i], width=line_size[i]),
connectgaps=True,
text = country +"Total"+ str(labels[i])+ ":"+ str(y_data[-1])
))
fig.show()
interact(plot_for_country,country="World")
cols = list(data_confirm_global_use1)
cols.insert(0,cols.pop(cols.index('Country/Region')))
data_confirm_global_use1 = data_confirm_global_use1.loc[:,cols]
col_name = ['Country', 'Province','Lat','Long','cases']
data_confirm_global_use1.columns =col_name
data_confirm_global_use1['Province'] = data_confirm_global_use1['Province'].fillna('Null')
fig = px.scatter_mapbox(data_confirm_global_use1,
lat="Lat", lon = "Long",
zoom=1,
hover_data=["Country","Province", "cases"],
mapbox_style="carto-positron",
range_color= [0, 100],title='Latest Number of Total Confirmed Cases throughout the World')
fig.update_layout(margin={"r":0,"t":30,"l":0,"b":0},
mapbox = {
},)
fig.show()
cols2 = list(data_confirm_global_use2)
cols2.insert(0,cols2.pop(cols2.index('Country/Region')))
data_confirm_global_use2 = data_confirm_global_use2.loc[:,cols2]
col_name2 = ['Country', 'Province','Lat','Long','cases1','cases2']
data_confirm_global_use2.columns =col_name2
data_confirm_global_use2["cases1"] = data_confirm_global_use2["cases2"] - data_confirm_global_use2["cases1"]
data_confirm_global_use2 = data_confirm_global_use2.drop(['cases2'], axis=1)
data_confirm_global_use2 = data_confirm_global_use2.rename(columns={'cases1':'cases'})
data_confirm_global_use2['Province'] = data_confirm_global_use2['Province'].fillna('Null')
fig2 = px.scatter_mapbox(data_confirm_global_use2,
lat="Lat", lon = "Long",
zoom=1,
hover_data=["Country","Province", "cases"],
mapbox_style="carto-positron",
range_color= [0, 100],title='Latest Number of Confirmed Cases throughout the World')
fig2.update_layout(margin={"r":0,"t":30,"l":0,"b":0},
mapbox = {
},)
fig2.show()
col_name = ['Country','12/5/20','12/6/20']
data_confirm_global_use3.columns =col_name
data_confirm_global_compare=data_confirm_global_use3.groupby(['Country'], sort=False,as_index=False).sum()
data_confirm_global_compare["cases"] = data_confirm_global_compare["12/6/20"] - data_confirm_global_compare["12/5/20"]
data_confirm_global_compare = data_confirm_global_compare.drop(['12/6/20','12/5/20'], axis=1)
data_confirm_global_compare = data_confirm_global_compare.sort_values(by='cases', ascending=True)
data_confirm_global_compare = data_confirm_global_compare.tail(50)
data = go.Bar(
x = data_confirm_global_compare.cases,
y = data_confirm_global_compare.Country,
orientation='h')
layout = go.Layout(
height = 1000,
title = "Comparsion of new cases on 2020/ 12/ 6"
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)
df_1 = df_death.drop(['Province/State', 'Lat','Long'], axis=1)
df_sort=df_1.groupby(['Country/Region'],as_index=False, sort=False).sum()
df_sort1 = df_sort[['Country/Region', '12/2/20']]
df_a=df_sort1.sort_values(by='12/2/20', ascending=False).head(10)
labels = df_a['Country/Region']
size = df_a['12/2/20']
plt.pie(size,labels=labels,autopct='%.2f')
plt.title("Top 10 Deaths Pie Chart as of 12/2/2020")
plt.show()
df_sort=df_1.groupby(['Country/Region'],as_index=False, sort=False).sum()
df_sort1 = df_sort[['Country/Region', '12/2/20']]
df_a=df_sort1.sort_values(by='12/2/20', ascending=False).head(30)
labels = df_a['Country/Region']
size = df_a['12/2/20']
f, ax = plt.subplots(figsize=(10, 20))
sns.barplot(x="12/2/20", y="Country/Region", data=df_a, orient="h")
ax.set(xlabel='Top 30 Deaths as of 12/2/20', ylabel='Country')
plt.show()
df_d = df_confirm.drop(['Province/State','Lat','Long'], axis=1)
df_sort=df_confirm.groupby(['Country/Region'],as_index=False, sort=False).sum()
df_sort1 = df_sort[['Country/Region', '12/2/20']]
df_a=df_sort1.sort_values(by='12/2/20', ascending=False).head(10)
labels = df_a['Country/Region']
size = df_a['12/2/20']
plt.pie(size,labels=labels,autopct='%.2f')
plt.title("Top 10 Confirm Pie Chart as of 12/2/2020")
plt.show()
df_sort=df_confirm.groupby(['Country/Region'],as_index=False, sort=False).sum()
df_sort1 = df_sort[['Country/Region', '12/2/20']]
df_a=df_sort1.sort_values(by='12/2/20', ascending=False).head(30)
labels = df_a['Country/Region']
size = df_a['12/2/20']
f, ax = plt.subplots(figsize=(10, 20))
sns.barplot(x="12/2/20", y="Country/Region", data=df_a, orient="h")
ax.set(xlabel='Top 30 Confirmed Cases as of 12/2/20', ylabel='Country')
plt.show()
df_death_1 = df_death.drop(['Province/State', 'Lat','Long'], axis=1)
df_confirm_1 = df_confirm.drop(['Province/State', 'Lat','Long'], axis=1)
df_2=df_death_1.groupby(['Country/Region'], sort=False).sum()
df_3=df_confirm_1.groupby(['Country/Region'], sort=False).sum()
death_rate = df_2 / df_3
Death_rate = death_rate.fillna(0)
death = Death_rate.loc[["Australia","China","Canada","Malaysia","US","Japan"],:]
death1 = death.iloc[:,300:320]
death1 = death1.transpose()
death1 = death1 * 100
fig, ax = plt.subplots()
fig.set_size_inches(18.5, 10.5)
sns.lineplot(data=death1)
ax.set(xlabel='Date', ylabel='Percentage of Death Rate')
plt.show()
df_infection_1 = df_infection.drop(['UID', 'iso2','iso3','code3','FIPS','Admin2','Country_Region','Lat','Long_','Combined_Key'], axis=1)
df_2=df_infection_1.groupby(['Province_State'], sort=False).sum()
df_2.head(40)
df_2.drop(df_2.loc[df_2['Population']==0].index, inplace=True)
df_2.iloc[0:,1:]
Infection_Death_Rate_of_Population = df_2.iloc[0:,1:].div(df_2.Population, axis=0)
Infection_Death_Rate_of_Population = Infection_Death_Rate_of_Population.fillna(0)
Infection = Infection_Death_Rate_of_Population.loc[["Texas","Wisconsin","California","Virginia","Washington","New York"],:]
Infection1 = Infection.iloc[:,300:320]
Infection1 = Infection1.transpose()
Infection1 = Infection1 * 100
fig, ax = plt.subplots()
fig.set_size_inches(18.5, 10.5)
sns.lineplot(data=Infection1)
ax.set(xlabel='Date', ylabel='Percentage of Infection Rate')
plt.show()
df_confirm_1 = df_confirm.drop(['Province/State', 'Lat','Long'], axis=1)
df_recover_1 = df_recover.drop(['Province/State', 'Lat','Long','12/7/20','12/8/20','12/9/20','12/10/20','12/11/20'], axis=1)
df_3=df_confirm_1.groupby(['Country/Region'], sort=False).sum()
df_4=df_recover_1.groupby(['Country/Region'], sort=False).sum()
recover_rate = df_4 / df_3
recover_rate = recover_rate.fillna(0)
recover = recover_rate.loc[["Australia","China","Canada","Malaysia","US","Japan"],:]
recover1 = recover.iloc[:,300:320]
recover1 = recover1.transpose()
fig, ax = plt.subplots()
fig.set_size_inches(18.5, 10.5)
sns.lineplot(data=recover1)
ax.set(xlabel='Date', ylabel='Recovery Rate')
plt.show()
plt.style.use('seaborn')
%matplotlib inline
columns = confirmed_df.keys()
confirmed = confirmed_df.loc[:, columns[4]:columns[-1]]
deaths = deaths_df.loc[:, columns[4]:columns[-1]]
recoveries = recoveries_df.loc[:, columns[4]:columns[-1]]
dates = confirmed.keys()
world_cases = []
total_deaths = []
mortality_rate = []
total_recovered = []
for i in dates:
confirmed_sum = confirmed[i].sum()
death_sum = deaths[i].sum()
recovered_sum = recoveries[i].sum()
world_cases.append(confirmed_sum)
total_deaths.append(death_sum)
mortality_rate.append(death_sum/confirmed_sum)
total_recovered.append(recovered_sum)
days_since_1_22 = np.array([i for i in range(len(dates))]).reshape(-1, 1)
world_cases = np.array(world_cases).reshape(-1, 1)
total_deaths = np.array(total_deaths).reshape(-1, 1)
total_recovered = np.array(total_recovered).reshape(-1, 1)
days_in_future = 15
future_forcast = np.array([i for i in range(len(dates)+days_in_future)]).reshape(-1, 1)
adjusted_dates = future_forcast[:-15]
start = '1/22/2020'
start_date = datetime.datetime.strptime(start, '%m/%d/%Y')
future_forcast_dates = []
for i in range(len(future_forcast)):
future_forcast_dates.append((start_date + datetime.timedelta(days=i)).strftime('%m/%d/%Y'))
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22, world_cases, test_size=0.15, shuffle=False)
linear_model = LinearRegression(normalize=True, fit_intercept=True)
linear_model.fit(X_train_confirmed, y_train_confirmed)
test_linear_pred = linear_model.predict(X_test_confirmed)
linear_pred = linear_model.predict(future_forcast)
tol = [1e-4, 1e-3, 1e-2]
alpha_1 = [1e-7, 1e-6, 1e-5, 1e-4]
alpha_2 = [1e-7, 1e-6, 1e-5, 1e-4]
lambda_1 = [1e-7, 1e-6, 1e-5, 1e-4]
lambda_2 = [1e-7, 1e-6, 1e-5, 1e-4]
bayesian_grid = {'tol': tol, 'alpha_1': alpha_1, 'alpha_2' : alpha_2, 'lambda_1': lambda_1, 'lambda_2' : lambda_2}
bayesian = BayesianRidge()
bayesian_search = RandomizedSearchCV(bayesian, bayesian_grid, scoring='neg_mean_squared_error', cv=3, return_train_score=True, n_jobs=-1, n_iter=40, verbose=1)
bayesian_search.fit(X_train_confirmed, y_train_confirmed)
warnings.filterwarnings('ignore')
bayesian_confirmed = bayesian_search.best_estimator_
test_bayesian_pred = bayesian_confirmed.predict(X_test_confirmed)
bayesian_pred = bayesian_confirmed.predict(future_forcast)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates, world_cases)
plt.title('Total Confirm of Coronavirus Cases Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('Total Confirm of Cases', size=30)
plt.xticks(size=15)
plt.show()
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates, world_cases)
plt.plot(future_forcast, linear_pred, linestyle='dashed', color='orange')
plt.title('Linear Regression Prediction of Coronavirus Cases Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('Linear Regression Prediction of Cases', size=30)
plt.legend(['Confirmed Cases', 'Linear Regression Predictions'])
plt.xticks(size=15)
plt.show()
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates, world_cases)
plt.plot(future_forcast, bayesian_pred, linestyle='dashed', color='green')
plt.title('Bayesian Ridge Regression Prediction of Coronavirus Cases Over Time', size=30)
plt.xlabel('Time', size=30)
plt.ylabel('Bayesian Ridge Regression Prediction of Cases', size=30)
plt.legend(['Confirmed Cases', 'Bayesian Ridge Regression Predictions'])
plt.xticks(size=15)
plt.show()
plt.figure(figsize=(10, 7))
plt.plot(adjusted_dates, total_deaths, color='r')
plt.plot(adjusted_dates, total_recovered, color='green')
plt.legend(['Deaths', 'Recoveries'], loc='best', fontsize=20)
plt.title('Death and Recoveries of Coronavirus Cases', size=20)
plt.xlabel('Time', size=20)
plt.ylabel('Deaths and Recovery of Cases', size=20)
plt.xticks(size=15)
plt.show()